Data Read-in

https://docs.google.com/spreadsheets/d/1nDXokcwtq_J8A6Uh7zAxsWW16MtcEC_1EiLstz4WGig/edit#gid=1782979571

Social_2022_rawNA_data <- readr::read_csv("DATA/Social Query 2022 NA Campaigns.csv") %>%
  clean_names() %>% 
  filter(date < '2022-07-01') %>% 
  filter(impressions_analyzed > 10)
Rows: 24117 Columns: 149
-- Column specification ---------------------------------------------------------------------
Delimiter: ","
chr   (4): olive_plan_name, platform, olive_placement_type, last_file_date
dbl  (99): olive_plan_id, olive_placement_id, brand_id, opid, _1_sec_in_view_impressions,...
lgl  (44): active_view_eligible_impressions, active_view_measurable_impressions, active_v...
date  (2): date, ingestion_date

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Social_2022_rawNA_lookup <- readr::read_csv("DATA/CampaignLookup.csv") %>%
  clean_names()
Rows: 9 Columns: 4
-- Column specification ---------------------------------------------------------------------
Delimiter: ","
chr (3): Name, Spend, Quarter
dbl (1): OPID

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Social_2022_MOAT_lookup = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1nDXokcwtq_J8A6Uh7zAxsWW16MtcEC_1EiLstz4WGig/edit#gid=1782979571") %>% 
  clean_names() %>% 
  filter(social == 'Y')
Auto-refreshing stale OAuth token.
√ Reading from Benchmark_Moat_Tile_2022.
√ Range Benchmark_Moat_Tile_2022.

Summary Stats

Placement types distinct


Social_2022_rawNA_data %>%
  group_by(platform,olive_placement_type) %>% 
  summarize (counts = n())
`summarise()` has grouped output by 'platform'. You can override using the `.groups` argument.
Social_2022_rawNA_data %>%
  group_by(platform,brand_id) %>% 
  summarize (counts = n()) %>%
  left_join(Social_2022_MOAT_lookup, by = 'brand_id')
`summarise()` has grouped output by 'platform'. You can override using the `.groups` argument.

Social_2022_processedNA_data <-
  Social_2022_rawNA_data %>%
  select(
    olive_plan_id:opid, impressions_analyzed,
    x2_sec_video_in_view_impressions, in_view_impressions, fully_on_screen_3sec_cumulative, player_vis_and_aud_on_complete_sum,
    valid_and_avoc, valid_and_viewable, valid_and_viewable_gm, valid_and_fully_on_screen_3sec_cumulative, valid_and_inview_3sec_cumulative
  ) %>%
  rename(platform_old = platform
         ) %>% 
  left_join(Social_2022_MOAT_lookup, by = 'brand_id'
  ) %>% 
  mutate(
    olive_plan_id = as.character(olive_plan_id),
    olive_placement_id = as.character(olive_placement_id),
    brand_id = as.character(brand_id) ,
    opid = as.character(opid)
  )

#Look at global moat

Social_2022_processedNA_data %>% 
  skim()
-- Data Summary ------------------------
                           Values    
Name                       Piped data
Number of rows             16137     
Number of columns          24        
_______________________              
Column type frequency:               
  character                13        
  Date                     1         
  numeric                  10        
________________________             
Group variables            None      

-- Variable type: character ---------------------------------------------------------------------------------------
# A tibble: 13 x 8
   skim_variable        n_missing complete_rate   min   max empty n_unique whitespace
 * <chr>                    <int>         <dbl> <int> <int> <int>    <int>      <int>
 1 olive_plan_id                0             1     5     5     0        8          0
 2 olive_plan_name              0             1    44    72     0        8          0
 3 platform_old                 0             1     6    18     0        7          0
 4 olive_placement_id           0             1     7     7     0      364          0
 5 olive_placement_type         0             1    15    27     0        3          0
 6 brand_id                     0             1     5     7     0       14          0
 7 opid                         0             1     7     7     0      364          0
 8 dataset_name                 0             1    20    51     0       14          0
 9 region                       0             1     2     6     0        2          0
10 channel                      0             1     5     7     0        2          0
11 social                       0             1     1     1     0        1          0
12 platform                     0             1     6     9     0        6          0
13 media_type                   0             1     5    15     0        4          0

-- Variable type: Date --------------------------------------------------------------------------------------------
# A tibble: 1 x 7
  skim_variable n_missing complete_rate min        max        median     n_unique
* <chr>             <int>         <dbl> <date>     <date>     <date>        <int>
1 date                  0             1 2022-01-18 2022-06-30 2022-03-03      164

-- Variable type: numeric -----------------------------------------------------------------------------------------
# A tibble: 10 x 11
   skim_variable                             n_missing complete_rate    mean      sd    p0   p25    p50     p75
 * <chr>                                         <int>         <dbl>   <dbl>   <dbl> <dbl> <dbl>  <dbl>   <dbl>
 1 impressions_analyzed                              0         1     209721. 518214.    11 2222  27191  166005 
 2 x2_sec_video_in_view_impressions               6641         0.588  56682. 191926.     0  255.  4414   26352.
 3 in_view_impressions                           10589         0.344 106745. 228837.     1  311. 39378  121618.
 4 fully_on_screen_3sec_cumulative                9578         0.406  39246. 135769.     0   41   1339   17771 
 5 player_vis_and_aud_on_complete_sum             8659         0.463    864.   2336.     0    0     43     452 
 6 valid_and_avoc                                 8659         0.463    862.   2322.     0    0     43     452 
 7 valid_and_viewable                              257         0.984  71189. 202864.     0  124   7672.  55972.
 8 valid_and_viewable_gm                         11095         0.312 190179. 444404.     0  149  41944  117986.
 9 valid_and_fully_on_screen_3sec_cumulative      9578         0.406  39245. 135767.     0   41   1339   17771 
10 valid_and_inview_3sec_cumulative               6221         0.614  19872.  83958.     0   57   1502   13010 
      p100 hist 
 *   <dbl> <chr>
 1 9704935 ▇▁▁▁▁
 2 6361782 ▇▁▁▁▁
 3 6120199 ▇▁▁▁▁
 4 5942621 ▇▁▁▁▁
 5   34734 ▇▁▁▁▁
 6   34040 ▇▁▁▁▁
 7 6361782 ▇▁▁▁▁
 8 7095033 ▇▁▁▁▁
 9 5942621 ▇▁▁▁▁
10 5942621 ▇▁▁▁▁

Plot Impression Level Data


p = Social_2022_processedNA_data %>% 
  ggplot() +
    aes(x = date, y = impressions_analyzed, color = olive_plan_name, fill = olive_plan_name) +
  xlab("Date") +
  ylab("Impressions") +
  ggtitle("Impressions by Campaign") + #fix size
geom_bar(stat = 'identity') +
  theme_bw() +
  theme(
     plot.title = element_text(size=22, hjust = 0.5),
     axis.title.y = element_blank()
  )

p

ggplotly(p)

Calculate V/V and AVOC Rates by Platform

Social_2022_SummarizedNA_date <-
Social_2022_processedNA_data %>%
  mutate(
    valid_viewable_imps =
      case_when(
        platform == 'Twitter' & channel == 'Display' ~ valid_and_inview_3sec_cumulative,
        platform == 'LinkedIn' ~ x2_sec_video_in_view_impressions,
        TRUE ~ valid_and_viewable
      ),
    avoc_imps = case_when(
    TRUE ~  player_vis_and_aud_on_complete_sum 
    ),
    quarter = lubridate::quarter(date),
    channel = case_when(
      channel == 'DISPLAY' ~ 'STATIC',
      TRUE ~ channel
    )
  ) %>% 
  group_by(olive_plan_name,date, channel, platform) %>% 
  summarize(
    valid_viewable_imps = sum(valid_viewable_imps),
    avoc_imps = sum(avoc_imps),
    impressions = sum(impressions_analyzed),
    quarter = max(quarter)
  ) %>% 
  arrange(date,olive_plan_name)

Social_2022_SummarizedNA_date %>% 
  filter(platform == 'Twitter') %>% 
  filter(channel == 'DISPLAY')

Social_2022_SummarizedNA_date %>% 
  group_by(quarter, platform, channel) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `AVOC Rate` = sum(avoc_imps)/sum(impressions)
  ) %>%     
  kbl() %>% 
 kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)
write_sheet(Social_2022_SummarizedNA_date, ss = 'https://docs.google.com/spreadsheets/d/1OT7zoqbcnadQsR8ehHbvMfKn8mieWYa-dVaCOHFdhHI/edit#gid=1469978055',
           sheet = 'Aggregated Data NA')

Additional Twitter Level Information


Social_2022_rawNA_DV30_data <- readr::read_csv("DATA/Social Query 2022 NA DV360.csv") %>%
  clean_names() %>% 
  filter(date < '2022-07-01') %>% 
  mutate(
    olive_placement_id = as.character(olive_placement_id)
  )
  

Social_2022_processedTwitterNA_data <-
Social_2022_processedNA_data %>% select(olive_plan_name,olive_placement_id,date,impressions_analyzed) %>%
  mutate(
    quarter = quarter(date)
  ) %>% 
  group_by(olive_plan_name,olive_placement_id,date,quarter) %>% 
  summarize(
    impressions_analyzed = sum(impressions_analyzed)
  ) %>%
  ungroup() %>% 
  left_join(Social_2022_rawNA_DV30_data, by = c('olive_placement_id','olive_plan_name', 'date')) %>% 
  filter(!is.na(olive_plan_name)) %>% 
  filter(platform == 'Twitter - Official' & olive_placement_type == 'Standard Banner (BAN)') %>% 
  mutate(
    impressions = impressions_analyzed,
    platform = 'Twitter',
    channel = 'STATIC',
    avoc_imps = NA,
    valid_viewable_imps = valid_and_viewable_impressions,
    Time = NA
  ) %>% 
  select(olive_plan_name,   date,   channel,    platform,   valid_viewable_imps,    avoc_imps,  impressions,    Time,   quarter)

Social_2022_SummarizedNA_data_updatedTwitter <-
Social_2022_SummarizedNA_date %>%
  filter(!(platform == 'Twitter' & channel == 'STATIC')) %>%   
  bind_rows (Social_2022_processedTwitterNA_data)

Update Twitter Information



write_sheet(Social_2022_SummarizedNA_data_updatedTwitter, ss = 'https://docs.google.com/spreadsheets/d/1OT7zoqbcnadQsR8ehHbvMfKn8mieWYa-dVaCOHFdhHI/edit#gid=1469978055',
           sheet = 'Additional NA Twitter Data')

H1 2022 Meta Ask (Top 7 Campaigns)

Data Read-in

Google Sheet with BQ Results

Social_2022_raw_Top7Meta_data = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1RU0QGpnn7FHtZaZfCA22kCcGpEeY1vLh0pHkJ9DEZfU/edit#gid=1752984558") %>% 
  clean_names() %>% 
  filter(impressions_analyzed > 10)


Social_2022_MOAT_lookup = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1nDXokcwtq_J8A6Uh7zAxsWW16MtcEC_1EiLstz4WGig/edit#gid=1782979571") %>% 
  clean_names() %>% 
  filter(social == 'Y')

Social_2022_raw_Top7Meta_data %>% 
  group_by(platform) %>% 
  summarize(dist = n())

Social_2022_processed_Top7Meta_data <-
  Social_2022_raw_Top7Meta_data %>%
  filter(platform == 'Facebook') %>% 
  select(
    olive_plan_id:opid, impressions_analyzed,
    x2_sec_video_in_view_impressions, in_view_impressions, fully_on_screen_3sec_cumulative, player_vis_and_aud_on_complete_sum,
    valid_and_avoc, valid_and_viewable, valid_and_viewable_gm, valid_and_fully_on_screen_3sec_cumulative, valid_and_inview_3sec_cumulative
  ) %>%
  rename(platform_old = platform
         ) %>% 
  left_join(Social_2022_MOAT_lookup, by = 'brand_id'
  ) %>% 
  mutate(
    olive_plan_id = as.character(olive_plan_id),
    olive_placement_id = as.character(olive_placement_id),
    brand_id = as.character(brand_id) ,
    opid = as.character(opid)
  )

#Check Missing Data

Social_2022_processed_Top7Meta_data %>% 
  skim()
-- Data Summary ------------------------
                           Values    
Name                       Piped data
Number of rows             4273      
Number of columns          24        
_______________________              
Column type frequency:               
  character                13        
  numeric                  10        
  POSIXct                  1         
________________________             
Group variables            None      

-- Variable type: character -----------------------------------------------------------------------------------------------------------------------
# A tibble: 13 x 8
   skim_variable        n_missing complete_rate   min   max empty n_unique whitespace
 * <chr>                    <int>         <dbl> <int> <int> <int>    <int>      <int>
 1 olive_plan_id                0             1     4     5     0        7          0
 2 olive_plan_name              0             1    43    68     0        7          0
 3 platform_old                 0             1     8     8     0        1          0
 4 olive_placement_id           0             1     7     7     0      124          0
 5 olive_placement_type         0             1    12    21     0        3          0
 6 brand_id                     0             1     7     7     0        5          0
 7 opid                         0             1     7     7     0      124          0
 8 dataset_name                 0             1    32    51     0        5          0
 9 region                       0             1     2     2     0        1          0
10 channel                      0             1     5     7     0        2          0
11 social                       0             1     1     1     0        1          0
12 platform                     0             1     8     9     0        2          0
13 media_type                   0             1     5    13     0        3          0

-- Variable type: numeric -------------------------------------------------------------------------------------------------------------------------
# A tibble: 10 x 11
   skim_variable                             n_missing complete_rate    mean      sd    p0    p25     p50     p75    p100 hist   
 * <chr>                                         <int>         <dbl>   <dbl>   <dbl> <dbl>  <dbl>   <dbl>   <dbl>   <dbl> <chr>  
 1 impressions_analyzed                              0         1     172452. 310322.    11   5698  39769  190414  2458527 "▇▁▁▁▁"
 2 x2_sec_video_in_view_impressions                473         0.889  14846.  25037.     0    539   3280.  19332.  255752 "▇▁▁▁▁"
 3 in_view_impressions                            3800         0.111 320224. 223119.  5632 137023 269619  443635  1111029 "▇▆▃▁▁"
 4 fully_on_screen_3sec_cumulative                4273         0        NaN      NA     NA     NA     NA      NA       NA " "    
 5 player_vis_and_aud_on_complete_sum              473         0.889   1255.   2535.     0     24    323    1200.   24082 "▇▁▁▁▁"
 6 valid_and_avoc                                  473         0.889   1255.   2535.     0     24    323    1200.   24082 "▇▁▁▁▁"
 7 valid_and_viewable                                0         1      48649. 123451.     0    697   4979   28720  1111029 "▇▁▁▁▁"
 8 valid_and_viewable_gm                          3800         0.111 667387. 469658. 11014 294637 561180  924167  2296318 "▇▆▃▁▁"
 9 valid_and_fully_on_screen_3sec_cumulative      4273         0        NaN      NA     NA     NA     NA      NA       NA " "    
10 valid_and_inview_3sec_cumulative                473         0.889  10966.  17556.     0    410   2431   15043.  198035 "▇▁▁▁▁"

-- Variable type: POSIXct -------------------------------------------------------------------------------------------------------------------------
# A tibble: 1 x 7
  skim_variable n_missing complete_rate min                 max                 median              n_unique
* <chr>             <int>         <dbl> <dttm>              <dttm>              <dttm>                 <int>
1 date                  0             1 2020-04-27 00:00:00 2021-06-30 00:00:00 2020-12-09 00:00:00      197

Plot Impression Level Data


p = Social_2022_processed_Top7Meta_data %>% 
  ggplot() +
    aes(x = date, y = impressions_analyzed, color = olive_plan_name, fill = olive_plan_name) +
  xlab("Date") +
  ylab("Impressions") +
  ggtitle("Meta Impressions by Campaign") + #fix size
geom_bar(stat = 'identity') +
  theme_bw() +
  theme(
     plot.title = element_text(size=22, hjust = 0.5),
     axis.title.y = element_blank()
  )

p


ggplotly(p)

Calculate V/V and AVOC Rates by Platform

Social_2022_Summarized_Top7Meta_data <-
Social_2022_processed_Top7Meta_data %>%
  mutate(
    valid_viewable_imps =
      case_when(
        platform == 'Twitter' & channel == 'Display' ~ valid_and_inview_3sec_cumulative,
        platform == 'LinkedIn' ~ x2_sec_video_in_view_impressions,
        TRUE ~ valid_and_viewable
      ),
    avoc_imps = case_when(
    is.na(player_vis_and_aud_on_complete_sum) ~ 0,
    TRUE ~  player_vis_and_aud_on_complete_sum 
    ),
    quarter = lubridate::quarter(date),
    channel = case_when(
      channel == 'DISPLAY' ~ 'STATIC',
      TRUE ~ channel
    )
  ) %>% 
  group_by(olive_plan_name,date, channel, platform) %>% 
  summarize(
    valid_viewable_imps = sum(valid_viewable_imps),
    avoc_imps = sum(avoc_imps),
    impressions = sum(impressions_analyzed),
    quarter = max(quarter)
  ) %>% 
  arrange(date,olive_plan_name)
`summarise()` has grouped output by 'olive_plan_name', 'date', 'channel'. You can override using the `.groups` argument.

Summarized Table - Campaign

Sumif Documentation


Social_2022_Summarized_Top7Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  ) %>%     
  kbl() %>% 
 kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)
olive_plan_name Valid and Viewable Rate Video vs Static
Chromebook NA Q4 2020 United States - Brand 0.14829 1.0000
Google FI Q2 2021 United States - Brand Demand Gen 0.11471 1.0000
Grow with Google NA Q2 2021 United States - Career Certifications 0.19344 1.0000
Pixel US Ramble Q1 2021 North America - Brand 0.41812 0.0666
Pixel US Ramble Q2 2021 North America - Brand 0.46276 0.0000
YouTube NA 2020 Q4 2020 United States - Longtail_Brand Voice_#669762 0.12791 1.0000
YouTube NA Q2 2020 United States - Longtail_James Charles_#669762 0.11704 1.0000

Write to Google Sheets


Social_2022_Summarized_Top7Meta_data_Table <-
Social_2022_Summarized_Top7Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  )
write_sheet(Social_2022_Summarized_Top7Meta_data_Table, ss = 'https://docs.google.com/spreadsheets/d/1RU0QGpnn7FHtZaZfCA22kCcGpEeY1vLh0pHkJ9DEZfU/edit#gid=1752984558',
           sheet = 'Aggregated Data From R')

H1 2022 Meta Ask (Additional 2 Campaigns)

Data Read-in

Google Sheet with BQ Results

Social_2022_raw_Add2Meta_data = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1YdkESoYlv6fzpQY84j4AB7Lw5qfW8XKPmh2B0gApYJA/edit#gid=1980208140") %>% 
  clean_names() %>% 
  filter(impressions_analyzed > 10)
√ Reading from Meta Ask Two Additional Campaigns.
√ Range Results.

Social_2022_raw_Add2Meta_data %>% 
  group_by(platform) %>% 
  summarize(dist = n())

Social_2022_processed_Add2Meta_data <-
  Social_2022_raw_Add2Meta_data %>%
  filter(platform == 'Facebook'|platform == 'Facebook/Instagram') %>% 
  select(
    olive_plan_id:opid, impressions_analyzed,
    x2_sec_video_in_view_impressions, in_view_impressions, fully_on_screen_3sec_cumulative, player_vis_and_aud_on_complete_sum,
    valid_and_avoc, valid_and_viewable, valid_and_viewable_gm, valid_and_fully_on_screen_3sec_cumulative, valid_and_inview_3sec_cumulative
  ) %>%
  rename(platform_old = platform
         ) %>% 
  left_join(Social_2022_MOAT_lookup, by = 'brand_id'
  ) %>% 
  mutate(
    olive_plan_id = as.character(olive_plan_id),
    olive_placement_id = as.character(olive_placement_id),
    brand_id = as.character(brand_id) ,
    opid = as.character(opid)
  )

#Check Missing Data

Social_2022_processed_Add2Meta_data %>% 
  skim()
-- Data Summary ------------------------
                           Values    
Name                       Piped data
Number of rows             8532      
Number of columns          24        
_______________________              
Column type frequency:               
  character                13        
  numeric                  10        
  POSIXct                  1         
________________________             
Group variables            None      

-- Variable type: character -------------------------------------------------------------------------------------------------------------
# A tibble: 13 x 8
   skim_variable        n_missing complete_rate   min   max empty n_unique whitespace
 * <chr>                    <int>         <dbl> <int> <int> <int>    <int>      <int>
 1 olive_plan_id                0             1     5     5     0        2          0
 2 olive_plan_name              0             1    53    59     0        2          0
 3 platform_old                 0             1     8    18     0        2          0
 4 olive_placement_id           0             1     7     7     0      124          0
 5 olive_placement_type         0             1    15    21     0        2          0
 6 brand_id                     0             1     7     7     0        7          0
 7 opid                         0             1     7     7     0      124          0
 8 dataset_name                 0             1    32    51     0        7          0
 9 region                       0             1     2     2     0        1          0
10 channel                      0             1     5     7     0        2          0
11 social                       0             1     1     1     0        1          0
12 platform                     0             1     8     9     0        2          0
13 media_type                   0             1     5    15     0        4          0

-- Variable type: numeric ---------------------------------------------------------------------------------------------------------------
# A tibble: 10 x 11
   skim_variable                             n_missing complete_rate    mean      sd    p0   p25    p50     p75    p100 hist   
 * <chr>                                         <int>         <dbl>   <dbl>   <dbl> <dbl> <dbl>  <dbl>   <dbl>   <dbl> <chr>  
 1 impressions_analyzed                              0         1     137611. 381278.    11  855.  9138.  89822  4318856 "▇▁▁▁▁"
 2 x2_sec_video_in_view_impressions               2422         0.716  13879.  42668.     0   68    688.   6622.  527738 "▇▁▁▁▁"
 3 in_view_impressions                            6114         0.283  87725. 229315.     1 1381.  8528   59999  2078543 "▇▁▁▁▁"
 4 fully_on_screen_3sec_cumulative                8532         0        NaN      NA     NA   NA     NA      NA       NA " "    
 5 player_vis_and_aud_on_complete_sum             2418         0.717   1066.   3791.     0    0     29     534.   70156 "▇▁▁▁▁"
 6 valid_and_avoc                                 2418         0.717   1066.   3791.     0    0     29     534.   70156 "▇▁▁▁▁"
 7 valid_and_viewable                                0         1      34801. 131568.     0  138   1564.  12955  2078543 "▇▁▁▁▁"
 8 valid_and_viewable_gm                          6114         0.283 179529. 437351.     5 2276  21661  143106. 3800796 "▇▁▁▁▁"
 9 valid_and_fully_on_screen_3sec_cumulative      8532         0        NaN      NA     NA   NA     NA      NA       NA " "    
10 valid_and_inview_3sec_cumulative               2418         0.717   9809.  29091.     0   50    528.   4958   365068 "▇▁▁▁▁"

-- Variable type: POSIXct ---------------------------------------------------------------------------------------------------------------
# A tibble: 1 x 7
  skim_variable n_missing complete_rate min                 max                 median              n_unique
* <chr>             <int>         <dbl> <dttm>              <dttm>              <dttm>                 <int>
1 date                  0             1 2021-10-21 00:00:00 2022-01-01 00:00:00 2021-12-05 00:00:00       72

Plot Impression Level Data


p = Social_2022_processed_Add2Meta_data %>% 
  ggplot() +
    aes(x = date, y = impressions_analyzed, color = olive_plan_name, fill = olive_plan_name) +
  xlab("Date") +
  ylab("Impressions") +
  ggtitle("Meta Impressions by Campaign") + #fix size
geom_bar(stat = 'identity') +
  theme_bw() +
  theme(
     plot.title = element_text(size=22, hjust = 0.5),
     axis.title.y = element_blank()
  )

p


ggplotly(p)

Calculate V/V and AVOC Rates by Platform

Social_2022_Summarized_Add2Meta_data <-
Social_2022_processed_Add2Meta_data %>%
  mutate(
    valid_viewable_imps =
      case_when(
        platform == 'Twitter' & channel == 'Display' ~ valid_and_inview_3sec_cumulative,
        platform == 'LinkedIn' ~ x2_sec_video_in_view_impressions,
        TRUE ~ valid_and_viewable
      ),
    avoc_imps = case_when(
    is.na(player_vis_and_aud_on_complete_sum) ~ 0,
    TRUE ~  player_vis_and_aud_on_complete_sum 
    ),
    quarter = lubridate::quarter(date),
    channel = case_when(
      channel == 'DISPLAY' ~ 'STATIC',
      TRUE ~ channel
    )
  ) %>% 
  group_by(olive_plan_name,date, channel, platform) %>% 
  summarize(
    valid_viewable_imps = sum(valid_viewable_imps),
    avoc_imps = sum(avoc_imps),
    impressions = sum(impressions_analyzed),
    quarter = max(quarter)
  ) %>% 
  arrange(date,olive_plan_name)
`summarise()` has grouped output by 'olive_plan_name', 'date', 'channel'. You can override using the `.groups` argument.

Summarized Table - Campaign

Sumif Documentation


Social_2022_Summarized_Add2Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  ) %>%     
  kbl() %>% 
 kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)
olive_plan_name Valid and Viewable Rate Video vs Static
Android NA 2021 Q4 2021 North America - Samsung D3 Campaign 0.32632 0.42451
GSA NA 2021 Q4 2021 Global - iGA Futura Brand Q4 2021 0.16121 0.81153

Write to Google Sheets


Social_2022_Summarized_Add2Meta_data_Table <-
Social_2022_Summarized_Add2Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  )
write_sheet(Social_2022_Summarized_Add2Meta_data_Table, ss = 'https://docs.google.com/spreadsheets/d/1RU0QGpnn7FHtZaZfCA22kCcGpEeY1vLh0pHkJ9DEZfU/edit#gid=1752984558',
           sheet = 'Additional 2 Campaign Data From R')
√ Writing to H1 2022 Meta Ask v2.
√ Writing to sheet Additional 2 Campaign Data From R.
---
title: "01_Data_Readin_Summary_2022NA_Social"
author: "Darshan Patel"
date: "`r Sys.Date()`"
output:
  html_notebook:
    toc: yes
    toc_float: yes
    number_sections: no
    theme: cerulean
    highlight: zenburn
    fig_width: 7
    fig_height: 6
    fig_caption: yes
    df_print: paged
---

```{r setup, include=FALSE}
#note: normally include = FALSE for this
knitr::opts_chunk$set(echo = TRUE) #all code chunks by default will be shown
options(knitr.table.format = "html") #table format
options(digits=5) #set digits in numbers
options(scipen = 100) #digits show before using scientific notation
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=80), tidy=TRUE)
#install.packages("pacman")
library(pacman) #for quick load/install of packages
p_load(
  dplyr, readr, tidyverse,forcats,purrr,lubridate, # reading in data
  janitor, sqldf,googlesheets4, # additional tools for dealing with data
  skimr,
  rqdatatable, #
  splitstackshape,stringr, #string related libraries
  kableExtra, ggplot2, plotly,echarts4r,ggpubr,scales,RColorBrewer,ggthemes, #for visualization of data
  reticulate #for using python
)
```

## Data Read-in

<https://docs.google.com/spreadsheets/d/1nDXokcwtq_J8A6Uh7zAxsWW16MtcEC_1EiLstz4WGig/edit#gid=1782979571>

```{r}
Social_2022_rawNA_data <- readr::read_csv("DATA/Social Query 2022 NA Campaigns.csv") %>%
  clean_names() %>% 
  filter(date < '2022-07-01') %>% 
  filter(impressions_analyzed > 10)


Social_2022_rawNA_lookup <- readr::read_csv("DATA/CampaignLookup.csv") %>%
  clean_names()

Social_2022_MOAT_lookup = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1nDXokcwtq_J8A6Uh7zAxsWW16MtcEC_1EiLstz4WGig/edit#gid=1782979571") %>% 
  clean_names() %>% 
  filter(social == 'Y')

```

## Summary Stats

### Placement types distinct

```{r}

Social_2022_rawNA_data %>%
  group_by(platform,olive_placement_type) %>% 
  summarize (counts = n())

Social_2022_rawNA_data %>%
  group_by(platform,brand_id) %>% 
  summarize (counts = n()) %>%
  left_join(Social_2022_MOAT_lookup, by = 'brand_id')

```

```{r}

Social_2022_processedNA_data <-
  Social_2022_rawNA_data %>%
  select(
    olive_plan_id:opid, impressions_analyzed,
    x2_sec_video_in_view_impressions, in_view_impressions, fully_on_screen_3sec_cumulative, player_vis_and_aud_on_complete_sum,
    valid_and_avoc, valid_and_viewable, valid_and_viewable_gm, valid_and_fully_on_screen_3sec_cumulative, valid_and_inview_3sec_cumulative
  ) %>%
  rename(platform_old = platform
         ) %>% 
  left_join(Social_2022_MOAT_lookup, by = 'brand_id'
  ) %>% 
  mutate(
    olive_plan_id = as.character(olive_plan_id),
    olive_placement_id = as.character(olive_placement_id),
    brand_id = as.character(brand_id) ,
    opid = as.character(opid)
  )

#Look at global moat

Social_2022_processedNA_data %>% 
  skim()
```

### Plot Impression Level Data

```{r, fig.height= 10, fig.width= 15}

p = Social_2022_processedNA_data %>% 
  ggplot() +
    aes(x = date, y = impressions_analyzed, color = olive_plan_name, fill = olive_plan_name) +
  xlab("Date") +
  ylab("Impressions") +
  ggtitle("Impressions by Campaign") + #fix size
geom_bar(stat = 'identity') +
  theme_bw() +
  theme(
     plot.title = element_text(size=22, hjust = 0.5),
     axis.title.y = element_blank()
  )

p

ggplotly(p)
```

### Calculate V/V and AVOC Rates by Platform

```{r}
Social_2022_SummarizedNA_date <-
Social_2022_processedNA_data %>%
  mutate(
    valid_viewable_imps =
      case_when(
        platform == 'Twitter' & channel == 'Display' ~ valid_and_inview_3sec_cumulative,
        platform == 'LinkedIn' ~ x2_sec_video_in_view_impressions,
        TRUE ~ valid_and_viewable
      ),
    avoc_imps = case_when(
    TRUE ~  player_vis_and_aud_on_complete_sum 
    ),
    quarter = lubridate::quarter(date),
    channel = case_when(
      channel == 'DISPLAY' ~ 'STATIC',
      TRUE ~ channel
    )
  ) %>% 
  group_by(olive_plan_name,date, channel, platform) %>% 
  summarize(
    valid_viewable_imps = sum(valid_viewable_imps),
    avoc_imps = sum(avoc_imps),
    impressions = sum(impressions_analyzed),
    quarter = max(quarter)
  ) %>% 
  arrange(date,olive_plan_name)
```

```{r}

Social_2022_SummarizedNA_date %>% 
  filter(platform == 'Twitter') %>% 
  filter(channel == 'DISPLAY')

```

```{r}

Social_2022_SummarizedNA_date %>% 
  group_by(quarter, platform, channel) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `AVOC Rate` = sum(avoc_imps)/sum(impressions)
  ) %>%     
  kbl() %>% 
 kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)
```

```{r}
write_sheet(Social_2022_SummarizedNA_date, ss = 'https://docs.google.com/spreadsheets/d/1OT7zoqbcnadQsR8ehHbvMfKn8mieWYa-dVaCOHFdhHI/edit#gid=1469978055',
           sheet = 'Aggregated Data NA')
```

### Additional Twitter Level Information

```{r}

Social_2022_rawNA_DV30_data <- readr::read_csv("DATA/Social Query 2022 NA DV360.csv") %>%
  clean_names() %>% 
  filter(date < '2022-07-01') %>% 
  mutate(
    olive_placement_id = as.character(olive_placement_id)
  )
  

Social_2022_processedTwitterNA_data <-
Social_2022_processedNA_data %>% select(olive_plan_name,olive_placement_id,date,impressions_analyzed) %>%
  mutate(
    quarter = quarter(date)
  ) %>% 
  group_by(olive_plan_name,olive_placement_id,date,quarter) %>% 
  summarize(
    impressions_analyzed = sum(impressions_analyzed)
  ) %>%
  ungroup() %>% 
  left_join(Social_2022_rawNA_DV30_data, by = c('olive_placement_id','olive_plan_name', 'date')) %>% 
  filter(!is.na(olive_plan_name)) %>% 
  filter(platform == 'Twitter - Official' & olive_placement_type == 'Standard Banner (BAN)') %>% 
  mutate(
    impressions = impressions_analyzed,
    platform = 'Twitter',
    channel = 'STATIC',
    avoc_imps = NA,
    valid_viewable_imps = valid_and_viewable_impressions,
    Time = NA
  ) %>% 
  select(olive_plan_name,	date,	channel,	platform,	valid_viewable_imps,	avoc_imps,	impressions,	Time,	quarter)

```

```{r}

Social_2022_SummarizedNA_data_updatedTwitter <-
Social_2022_SummarizedNA_date %>%
  filter(!(platform == 'Twitter' & channel == 'STATIC')) %>%   
  bind_rows (Social_2022_processedTwitterNA_data)

```

### Update Twitter Information

```{r}


write_sheet(Social_2022_SummarizedNA_data_updatedTwitter, ss = 'https://docs.google.com/spreadsheets/d/1OT7zoqbcnadQsR8ehHbvMfKn8mieWYa-dVaCOHFdhHI/edit#gid=1469978055',
           sheet = 'Additional NA Twitter Data')

```

------------------------------------------------------------------------

# H1 2022 Meta Ask (Top 7 Campaigns)

## Data Read-in

[Google Sheet with BQ Results](https://docs.google.com/spreadsheets/d/1RU0QGpnn7FHtZaZfCA22kCcGpEeY1vLh0pHkJ9DEZfU/edit#gid=1752984558)

```{r}
Social_2022_raw_Top7Meta_data = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1RU0QGpnn7FHtZaZfCA22kCcGpEeY1vLh0pHkJ9DEZfU/edit#gid=1752984558") %>% 
  clean_names() %>% 
  filter(impressions_analyzed > 10)


Social_2022_MOAT_lookup = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1nDXokcwtq_J8A6Uh7zAxsWW16MtcEC_1EiLstz4WGig/edit#gid=1782979571") %>% 
  clean_names() %>% 
  filter(social == 'Y')
```

```{r}

Social_2022_raw_Top7Meta_data %>% 
  group_by(platform) %>% 
  summarize(dist = n())
```

```{r}

Social_2022_processed_Top7Meta_data <-
  Social_2022_raw_Top7Meta_data %>%
  filter(platform == 'Facebook') %>% 
  select(
    olive_plan_id:opid, impressions_analyzed,
    x2_sec_video_in_view_impressions, in_view_impressions, fully_on_screen_3sec_cumulative, player_vis_and_aud_on_complete_sum,
    valid_and_avoc, valid_and_viewable, valid_and_viewable_gm, valid_and_fully_on_screen_3sec_cumulative, valid_and_inview_3sec_cumulative
  ) %>%
  rename(platform_old = platform
         ) %>% 
  left_join(Social_2022_MOAT_lookup, by = 'brand_id'
  ) %>% 
  mutate(
    olive_plan_id = as.character(olive_plan_id),
    olive_placement_id = as.character(olive_placement_id),
    brand_id = as.character(brand_id) ,
    opid = as.character(opid)
  )

#Check Missing Data

Social_2022_processed_Top7Meta_data %>% 
  skim()

```

### Plot Impression Level Data

```{r, fig.height= 10, fig.width= 20}

p = Social_2022_processed_Top7Meta_data %>% 
  ggplot() +
    aes(x = date, y = impressions_analyzed, color = olive_plan_name, fill = olive_plan_name) +
  xlab("Date") +
  ylab("Impressions") +
  ggtitle("Meta Impressions by Campaign") + #fix size
geom_bar(stat = 'identity') +
  theme_bw() +
  theme(
     plot.title = element_text(size=22, hjust = 0.5),
     axis.title.y = element_blank()
  )

p

ggplotly(p)
```

### Calculate V/V and AVOC Rates by Platform

```{r}
Social_2022_Summarized_Top7Meta_data <-
Social_2022_processed_Top7Meta_data %>%
  mutate(
    valid_viewable_imps =
      case_when(
        platform == 'Twitter' & channel == 'Display' ~ valid_and_inview_3sec_cumulative,
        platform == 'LinkedIn' ~ x2_sec_video_in_view_impressions,
        TRUE ~ valid_and_viewable
      ),
    avoc_imps = case_when(
    is.na(player_vis_and_aud_on_complete_sum) ~ 0,
    TRUE ~  player_vis_and_aud_on_complete_sum 
    ),
    quarter = lubridate::quarter(date),
    channel = case_when(
      channel == 'DISPLAY' ~ 'STATIC',
      TRUE ~ channel
    )
  ) %>% 
  group_by(olive_plan_name,date, channel, platform) %>% 
  summarize(
    valid_viewable_imps = sum(valid_viewable_imps),
    avoc_imps = sum(avoc_imps),
    impressions = sum(impressions_analyzed),
    quarter = max(quarter)
  ) %>% 
  arrange(date,olive_plan_name)
```

### Summarized Table - Campaign

[Sumif Documentation](https://stackoverflow.com/questions/23528862/summarize-all-group-values-and-a-conditional-subset-in-the-same-call)

```{r}

Social_2022_Summarized_Top7Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  ) %>%     
  kbl() %>% 
 kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)
```

### Write to Google Sheets

```{r}

Social_2022_Summarized_Top7Meta_data_Table <-
Social_2022_Summarized_Top7Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  )

```

```{r}
write_sheet(Social_2022_Summarized_Top7Meta_data_Table, ss = 'https://docs.google.com/spreadsheets/d/1RU0QGpnn7FHtZaZfCA22kCcGpEeY1vLh0pHkJ9DEZfU/edit#gid=1752984558',
           sheet = 'Aggregated Data From R')
```



------------------------------------------------------------------------

# H1 2022 Meta Ask (Additional 2 Campaigns)

## Data Read-in

[Google Sheet with BQ Results](https://docs.google.com/spreadsheets/d/19MvBKXcBtv9Whn4uxarKGvuLDoEgsfrLrITPbt2jfro/edit#gid=372594310)

```{r}
Social_2022_raw_Add2Meta_data = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1YdkESoYlv6fzpQY84j4AB7Lw5qfW8XKPmh2B0gApYJA/edit#gid=1980208140") %>% 
  clean_names() %>% 
  filter(impressions_analyzed > 10)


Social_2022_MOAT_lookup = googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1nDXokcwtq_J8A6Uh7zAxsWW16MtcEC_1EiLstz4WGig/edit#gid=1782979571") %>% 
  clean_names() %>% 
  filter(social == 'Y')
```

```{r}

Social_2022_raw_Add2Meta_data %>% 
  group_by(platform) %>% 
  summarize(dist = n())
```

```{r}

Social_2022_processed_Add2Meta_data <-
  Social_2022_raw_Add2Meta_data %>%
  filter(platform == 'Facebook'|platform == 'Facebook/Instagram') %>% 
  select(
    olive_plan_id:opid, impressions_analyzed,
    x2_sec_video_in_view_impressions, in_view_impressions, fully_on_screen_3sec_cumulative, player_vis_and_aud_on_complete_sum,
    valid_and_avoc, valid_and_viewable, valid_and_viewable_gm, valid_and_fully_on_screen_3sec_cumulative, valid_and_inview_3sec_cumulative
  ) %>%
  rename(platform_old = platform
         ) %>% 
  left_join(Social_2022_MOAT_lookup, by = 'brand_id'
  ) %>% 
  mutate(
    olive_plan_id = as.character(olive_plan_id),
    olive_placement_id = as.character(olive_placement_id),
    brand_id = as.character(brand_id) ,
    opid = as.character(opid)
  )

#Check Missing Data

Social_2022_processed_Add2Meta_data %>% 
  skim()

```

### Plot Impression Level Data

```{r, fig.height= 10, fig.width= 20}

p = Social_2022_processed_Add2Meta_data %>% 
  ggplot() +
    aes(x = date, y = impressions_analyzed, color = olive_plan_name, fill = olive_plan_name) +
  xlab("Date") +
  ylab("Impressions") +
  ggtitle("Meta Impressions by Campaign") + #fix size
geom_bar(stat = 'identity') +
  theme_bw() +
  theme(
     plot.title = element_text(size=22, hjust = 0.5),
     axis.title.y = element_blank()
  )

p

ggplotly(p)
```

### Calculate V/V and AVOC Rates by Platform

```{r}
Social_2022_Summarized_Add2Meta_data <-
Social_2022_processed_Add2Meta_data %>%
  mutate(
    valid_viewable_imps =
      case_when(
        platform == 'Twitter' & channel == 'Display' ~ valid_and_inview_3sec_cumulative,
        platform == 'LinkedIn' ~ x2_sec_video_in_view_impressions,
        TRUE ~ valid_and_viewable
      ),
    avoc_imps = case_when(
    is.na(player_vis_and_aud_on_complete_sum) ~ 0,
    TRUE ~  player_vis_and_aud_on_complete_sum 
    ),
    quarter = lubridate::quarter(date),
    channel = case_when(
      channel == 'DISPLAY' ~ 'STATIC',
      TRUE ~ channel
    )
  ) %>% 
  group_by(olive_plan_name,date, channel, platform) %>% 
  summarize(
    valid_viewable_imps = sum(valid_viewable_imps),
    avoc_imps = sum(avoc_imps),
    impressions = sum(impressions_analyzed),
    quarter = max(quarter)
  ) %>% 
  arrange(date,olive_plan_name)
```

### Summarized Table - Campaign

[Sumif Documentation](https://stackoverflow.com/questions/23528862/summarize-all-group-values-and-a-conditional-subset-in-the-same-call)

```{r}

Social_2022_Summarized_Add2Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  ) %>%     
  kbl() %>% 
 kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)
```

### Write to Google Sheets

```{r}

Social_2022_Summarized_Add2Meta_data_Table <-
Social_2022_Summarized_Add2Meta_data %>% 
  group_by(olive_plan_name) %>% 
  summarize(
    `Valid and Viewable Rate` = sum(valid_viewable_imps)/sum(impressions),
    `Video vs Static` = 1 - sum(impressions[channel =='STATIC'])/sum(impressions)
  )

```

```{r}
write_sheet(Social_2022_Summarized_Add2Meta_data_Table, ss = 'https://docs.google.com/spreadsheets/d/1RU0QGpnn7FHtZaZfCA22kCcGpEeY1vLh0pHkJ9DEZfU/edit#gid=1752984558',
           sheet = 'Additional 2 Campaign Data From R')
```


